In [1]:
import numpy as np
import pandas as pd
from sklearn.utils import resample

original data


In [2]:
mat = np.arange(45).reshape(-1,3)
mat


Out[2]:
array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20],
       [21, 22, 23],
       [24, 25, 26],
       [27, 28, 29],
       [30, 31, 32],
       [33, 34, 35],
       [36, 37, 38],
       [39, 40, 41],
       [42, 43, 44]])

dataframe to hold bootstrapped dataset


In [3]:
df = pd.DataFrame(np.nan, index = range(1500), columns = list('abc'))

simple bootstrap with replacement


In [4]:
cnt = 5
for i in range(300):
    df.iloc[cnt-5:cnt,:] = resample(mat,n_samples = 5, replace = True)
    cnt += 5
df.head()


Out[4]:
a b c
0 27.0 28.0 29.0
1 0.0 1.0 2.0
2 0.0 1.0 2.0
3 42.0 43.0 44.0
4 27.0 28.0 29.0

simple bootstrap with replacement and added noise


In [5]:
cnt = 5
for i in range(300):
    df.iloc[cnt-5:cnt,:] = resample(mat,n_samples = 5, replace = True) + np.random.randint(3, size=(5,3))
    cnt += 5
df.head()


Out[5]:
a b c
0 41.0 41.0 41.0
1 5.0 6.0 5.0
2 44.0 45.0 45.0
3 23.0 22.0 24.0
4 34.0 34.0 36.0